{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# GPU: 32*40 in 10.9s = 117/s\n",
    "# CPU: 32*8 in 31.2s = 8/s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OS:  linux\n",
      "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
      "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
      "Numpy:  1.13.3\n",
      "CNTK:  2.2\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "import numpy as np\n",
    "import cntk as C\n",
    "from cntk import load_model, combine\n",
    "print(\"OS: \", sys.platform)\n",
    "print(\"Python: \", sys.version)\n",
    "print(\"Numpy: \", np.__version__)\n",
    "print(\"CNTK: \", C.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6\r\n"
     ]
    }
   ],
   "source": [
    "!cat /proc/cpuinfo | grep processor | wc -l"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name\r\n",
      "Tesla K80\r\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi --query-gpu=gpu_name --format=csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "BATCH_SIZE = 32\n",
    "RESNET_FEATURES = 2048\n",
    "BATCHES_GPU = 40\n",
    "BATCHES_CPU = 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def give_fake_data(batches):\n",
    "    \"\"\" Create an array of fake data to run inference on\"\"\"\n",
    "    np.random.seed(0)\n",
    "    dta = np.random.rand(BATCH_SIZE*batches, 224, 224, 3).astype(np.float32)\n",
    "    return dta, np.swapaxes(dta, 1, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def yield_mb(X, batchsize):\n",
    "    \"\"\" Function yield (complete) mini_batches of data\"\"\"\n",
    "    for i in range(len(X)//batchsize):\n",
    "        yield i, X[i*batchsize:(i+1)*batchsize]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
     ]
    }
   ],
   "source": [
    "# Create batches of fake data\n",
    "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCHES_GPU)\n",
    "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#%%bash\n",
    "#wget https://www.cntk.ai/Models/CNTK_Pretrained/ResNet50_ImageNet_CNTK.model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Features (penultimate layer)\n",
    "node_name = \"z.x\"\n",
    "model_file = \"ResNet50_ImageNet_CNTK.model\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_fn(classifier, data, batchsize):\n",
    "    \"\"\" Return features from classifier \"\"\"\n",
    "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
    "    for idx, dta in yield_mb(data, batchsize):\n",
    "        pred = classifier.eval(dta)\n",
    "        out[idx*batchsize:(idx+1)*batchsize] = pred[0].squeeze()\n",
    "    return out"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load model\n",
    "loaded_model  = load_model(model_file)\n",
    "node_in_graph = loaded_model.find_by_name(node_name)\n",
    "output_nodes  = combine([node_in_graph.owner])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "fake_input_data_cf = np.ascontiguousarray(fake_input_data_cf)\n",
    "cold_start = predict_fn(output_nodes, fake_input_data_cf, BATCH_SIZE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 7.03 s, sys: 1.6 s, total: 8.62 s\n",
      "Wall time: 10.9 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# GPU: 10.9s\n",
    "features = predict_fn(output_nodes, fake_input_data_cf, BATCH_SIZE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. CPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# HACK -> have to manually restart notebook and rerun\n",
    "# Otherwise runs on GPU!!!!!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Kill all GPUs ...\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '-1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load model\n",
    "loaded_model  = load_model(model_file)\n",
    "node_in_graph = loaded_model.find_by_name(node_name)\n",
    "output_nodes  = combine([node_in_graph.owner])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(256, 224, 224, 3) (256, 3, 224, 224)\n"
     ]
    }
   ],
   "source": [
    "# Create batches of fake data\n",
    "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCHES_CPU)\n",
    "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "fake_input_data_cf = np.ascontiguousarray(fake_input_data_cf)\n",
    "cold_start = predict_fn(output_nodes, fake_input_data_cf, BATCH_SIZE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2min 46s, sys: 2.22 s, total: 2min 48s\n",
      "Wall time: 31.2 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# CPU: 31.2s\n",
    "features = predict_fn(output_nodes, fake_input_data_cf, BATCH_SIZE)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
